#Process FEEM data
#prepare absorbance data filename, dilution data filename
#choose interpolation, slope parameters, PARAFRAC

#update R
#install.packages(installr)
#library(installr)
#updateR()

#WD
#cpu CUserskoenvOneDrive - WageningenURPHD onedrive2. MethodsFEEMdata processing
#laptop CUserskoenOneDrive - WageningenURPHD onedrive2. MethodsFEEMdata processing
#wur CUsersgijn004OneDrive - WageningenURPHD onedrive2. MethodsFEEMdata processing
path = CUserskoenvOneDrive - WageningenURPHD onedrive2. MethodsFEEMdata processing
setwd(path)

#install package and Load library

#install.packages(staRdom)
#install.packages(tidyr)
#install.packages(dplyr)
library(staRdom)
library(tidyr)
library(dplyr)

#Load FEEM data
pathcsv = csv data
eem_list - eem_read(pathcsv, import_function = eem_csv) 
cores - detectCores(logical = FALSE)


#1.1 change samples names
#import new fn list
#eem_list - eem_name_replace(eem_list,c((FD3)),c())

#1.2 absorbance baseline correction, based on high WLs
abs_list - absorbance_read(abs dilabsorbance file.csv)
#abs_list - abs_blcor(abs_list,wlrange = c(680,700))

#1.3 spectral correction, correction for machine  makes bottom part of graph grey
#excorfile - system.file(extdataCorrectionFilesxc06se06n.csv,package=staRdom)
#Excor - data.tablefread(excorfile)
#emcorfile - system.file(extdataCorrectionFilesmcorrs_4nm.csv,package=staRdom)
#Emcor - data.tablefread(emcorfile)
#eem_list - eem_spectral_cor(eem_list,Excor,Emcor)

#1.4 interpolation and blank subtraction, blank name contains blank or mq in same subfulder is subtracted 
eem_list - eem_extend2largest(eem_list, interpolation = 1, extend = FALSE, cores = cores)
eem_list - eem_remove_blank(eem_list)

#1.5 inner filter effect correction, use absorbance measurement to correct for absorbance of emission measurements
#eem_list - eem_ife_correction(eem_list,abs_list, cuvl = 1)

#1.6 RAMAN normalisation (standardize for fluorimeter differences based on blank)
eem_list - eem_raman_normalisation2(eem_list, blank = blank)

#1.7 remove blanks from eem_list and abs_list
eem_list - eem_extract(eem_list, c(nano, miliq, milliq, mq, blank),ignore_case = TRUE)
absorbance - dplyrselect(absorbance, -matches(nanomiliqmilliqmqblank, ignore.case = TRUE))

#1.8 remove releigh and raman scattering (4 bands) and interpolate removed bands
remove_scatter - c(FALSE, TRUE, TRUE, TRUE)
remove_scatter_width - c(15,15,15,15)
eem_list - eem_rem_scat(eem_list, remove_scatter = remove_scatter, remove_scatter_width = remove_scatter_width)
eem_list - eem_interp(eem_list, cores = cores, type = 1, extend = FALSE)

#1.9 correct for dilutions
#dil_list - read.table(abs dildilution file.csv, header = TRUE, sep = ,, dec = ., row.names = 1)
#eem_list - eem_dilution(eem_list,dil_list)

#1.10 smooth data (not advised for PARAFRAC)
eem4peaks - eem_smooth(eem_list, n - 4)

#1.11 plot
summary(eem_list)
eem_overview_plot(eem_list, spp = 6, contour = TRUE)   #ggeem(eem_list, redneg = TRUE)
eem_overview_plot(eem_list, spp = 26) %%
  lapply(function(plot){
    plot +
      facet_wrap(~sample, ncol = 4) +
      labs(x = Emission (nm), y = Excitation (nm))
  })

#2.1 peak picking and indices 
bix - eem_biological_index(eem4peaks)
coble_peaks - eem_coble_peaks(eem4peaks)
fi - eem_fluorescence_index(eem4peaks)
hix - eem_humification_index(eem4peaks, scale = TRUE)

indices_peaks - bix %%
  full_join(coble_peaks, by = sample) %%
  full_join(fi, by = sample) %%
  full_join(hix, by = sample)

indices_peaks
write.csv(indices_peaks,'.OutputIndices peaks.csv')

#2.2 absorbance indices
slope_parms - abs_parms(abs_list, cuvl = 1, cores = cores)

slope_parms 
write.csv(slope_parms,'.OutputSlope parameters.csv')


#3 PARAFRAC
#3.1 Explore dataset, compare models with different amount of components
data(pf_models)
# minimum and maximum of numbers of components
dim_min - 3
dim_max - 7

nstart - 25 # number of similar models from which best is chosen
maxit = 5000 # maximum number of iterations in PARAFAC analysis
ctol - 10^-6 # tolerance in PARAFAC analysis

# calculating PARAFAC models, one for each number of components
#pf1 - eem_parafac(eem_list, comps = seq(dim_min,dim_max), normalise = FALSE, const = c(uncons, uncons, uncons), maxit = maxit, nstart = nstart, ctol = ctol, cores = cores)

# same model but using non-negative constraints
pf1n - eem_parafac(eem_list, comps = seq(dim_min,dim_max), normalise = FALSE, const = c(nonneg, nonneg, nonneg), maxit = maxit, nstart = nstart, ctol = ctol, cores = cores)

# rescale B and C modes to a maximum fluorescence of 1 for each component
pf1 - lapply(pf1, eempf_rescaleBC, newscale = Fmax)
pf1n - lapply(pf1n, eempf_rescaleBC, newscale = Fmax)

#compare the different models
eempf_compare(pf1n, contour = TRUE)
#The single plots can be created using eempf_fits and eempf_plot_comp


# 3.2 Check the correlation between different components, components should not be correlation, large variation in DOC concentrations can cause correlation
eempf_cortable(pf1n[[1]], normalisation = FALSE) #the number is the number of the model and not the number of the amount of components

eempf_corplot(pf1n[[1]], progress = FALSE, normalisation = FALSE)

pf2 - eem_parafac(eem_list, comps = seq(dim_min,dim_max), normalise = TRUE, const = c(nonneg, nonneg, nonneg), maxit = maxit, nstart = nstart, ctol = ctol, cores = cores)

# rescale B and C modes
pf2 - lapply(pf2, eempf_rescaleBC, newscale = Fmax)

# eempf_compare(pf2, contour = TRUE) # use this to show the same plot as above
# for now, we are happy with just the components
eempf_plot_comps(pf2, contour = TRUE, type = 1)

# 3.3 Find and exclude outliers leverage

# calculate leverage
cpl - eempf_leverage(pf2[[1]])

# plot leverage (nice plot), interactive plot, you can select which wavelenghts  samples to expcluse. Note which samples you exclude!!
eempf_leverage_plot(cpl,qlabel=0.1)

# plot leverage, not so nice plot but interactive to select what to exclude
# saved in exclude, can be used to start over again with eem_list_ex - eem_list %% eem_exclude(exclude) above
exclude - eempf_leverage_ident(cpl,qlabel=0.1)

pf3 - eem_parafac(eem_list_ex, comps = seq(dim_min,dim_max), normalise = TRUE, maxit = maxit, nstart = nstart, ctol = ctol, cores = cores)
pf3 - lapply(pf3, eempf_rescaleBC, newscale = Fmax)

eempf_leverage_plot(eempf_leverage(pf3[[1]]),qlabel=0.1)

# 3.4 Examine residuals
eempf_residuals_plot(pf3[[1]], eem_list, residuals_only = TRUE, select = c(), spp = 6, cores = cores, contour = TRUE)

#3.5 Recalculating the model with increased accuracy
ctol - 10^-8 # decrease tolerance in PARAFAC analysis
nstart = 50 # increase number of random starts
maxit = 10000 # increase number of maximum interations

pf4 - eem_parafac(eem_list_ex, comps = 3, normalise = TRUE, const = c(nonneg, nonneg, nonneg), maxit = maxit, nstart = nstart, ctol = ctol, output = all, cores = cores)

pf4 - lapply(pf4, eempf_rescaleBC, newscale = Fmax)


# check convergence (outliers) of the model with incresed accuracy, leverage of samples  0.1 is good
eempf_convergence(pf4[[1]])
eempf_leverage_plot(eempf_leverage(pf4[[1]])) # [[1]] means the 4th model in the list, 6 component model in that case

## 4 when satisfied with the improved model
#  visualise the calculated components
eempf_comp_load_plot(pf4[[1]], contour = TRUE) ## [[1]] and [[2]] for the components and loadings (loadings instead of paragraph below)

#visualize the components in the different samples
eempf_plot_comps(pf4[1], type = 2) # this function can be used to view the B- and C-modes
#Separate plots can be generated by using ggeem for components and eempf_load_plot for the loadings.
#It is possible to view the components in 3D using eempf_comps3D.

# plot components in each sample, residual and whole sample
eempf_residuals_plot(pf4[[1]], eem_list, select = eem_names(eem_list)[1014], cores = cores, contour = TRUE)
## [[1]]

# model validation and formating
